********************************************************************************
*	PROJECT: Childhood confidence and long-term outcomes (PSID)
*	PURPOSE: Make table 1, persistence of confidence measures 
*	PUBLISHED: August 2022
*	CONTACT: Hannah Ruebeck, hruebeck@mit.edu	
********************************************************************************

clear all
set more off
set maxvar 10000
pause on

** Paths;
local path "/Users/XXXXX/Downloads/replication materials"


local DO "`path'/code"
local CLEAN "`path'/clean"
local OUT "`path'/out/tables"

/*
Description: This code makes table 1 (persistence of the main confidence measures). 
There are three main measures of confidence
		1. Math over-confidence (binary): math_overcnf
		2. Math under-confidence (binary): math_undrcnf 
		4. Degrees of over- and under-confidence ("continuous", in sd units): z_math_confidence_w0
*/



********************************************************************************
// Set up controls etc.

do "`DO'/set_up_regressions.do"
clear 
 
********************************************************************************
// Programs

program get_pval, rclass
    syntax, b(string) se(string) [df(string) normal(string)]
	
	if "`df'"!="" & "`normal'"!="" {
	    di as error "df() is only specified for a t-distribution and cannot be jointly specified with normal"
		error	
	}
	if "`df'"=="" & "`normal'"=="" {
	    di as error "one of normal() and df() must be specified"
		error	
	}
	
    local se = subinstr("`se'", "(", "", .)
    local se = subinstr("`se'", ")", "", .)
    if "`df'"!="" {
		local p = 2*ttail(`df',abs(`b'/`se'))
		if `p' >.1 		 local star = ""
		if `p' >.05 & `p' <=.1   local star = "$^{*}$"
		if `p' >.01 & `p' <=.05  local star = "$^{**}$"
		if `p' <=.01  		 local star = "$^{***}$" 
    }
    if "`normal'"!="" {
		local p =  2*(1-normal(abs(`b'/`se')))
		if `p' >.1 		 local star = ""  
		if `p' >.05 & `p' <=.1   local star = "$^{*}$"
		if `p' >.01 & `p' <=.05  local star = "$^{**}$"
		if `p' <=.01  		 local star = "$^{***}$"
    }
    return local star = "`star'"

end

program set_up_data 
	syntax, s(string) w(int)
		
			* renames for simplicity
			gen `s'_confidence_w`w' = c_fst_rsd_u_`s'cnf_ratesk7_w`w'
			gen later_`s'_confidence_w`w' = sec_rsd_u_`s'cnf_ratesk7_w`w'
			
			gen `s'_overcnf =  c_fst_`s'_overcnf_orig 
			gen later_`s'_overcnf =  sec_`s'_overcnf_orig 
			gen `s'_undrcnf =  c_fst_`s'_undrcnf_orig 
			gen later_`s'_undrcnf =  sec_`s'_undrcnf_orig 
			
						
			*standardize
			gen z_`s'_confidence_w`w' = c_fst_z_rsd_u_`s'cnf_ratesk7_w`w'
			egen later_z_`s'_confidence_w`w' = std(sec_rsd_u_`s'cnf_ratesk7_w`w')
			
			* label
			label var later_`s'_overcnf "over-confidence"
			label var later_`s'_undrcnf "under-confidence"
			label var later_`s'_confidence_w`w' "confidence (integers -6 to 6)"
			label var later_z_`s'_confidence_w`w' "confidence (standard deviation units)"
end

program write_ols_reg_table
    syntax, varlist(string) outgroup(string) outfolder(string) datafolder(string) subj(string) ///
			datatype(string) description(string)  weightnum(string) ///
			[extracontrols(string) extracontrolsnote(string)] 
        
	local weightvar "weight_confsamp`weightnum'"
			
	if "`subj'"=="math" {
	    local Subject "Math"
		local subject "math"
		local test "ap"
	}
	if "`subj'"=="read" {
		local Subject "Reading"
		local subject "reading"
		local test "total"
	}
	
	if `weightnum'==0 {
		local weightdesc ""
	}
	if `weightnum'==1 {
		local weightdesc "Observations are weighted by the inverse probability of their inclusion in the sample in 1997 given the sampling design of the CDS. All variables that are indices or standardized are normalized relative to the weighted distribution." 
	}
	if `weightnum'==2 {
		local weightdesc "Observations are weighted by the inverse probability of their inclusion in the sample in 1997 given the sampling design of the CDS, adjusted to account for attrition from the analysis sample. All controls that are indices are normalized relative to the weighted distribution."
	}
	if `weightnum'==3 {
		local weightdesc "Observations are weighted so that the analysis sample matches the racial makeup of the US population in the 1990 census and so that the distribution of math percentile scores is uniform by decile, and the distribution of income is uniform by quartile. All controls that are indices are normalized relative to the weighted distribution."
	}
	if strpos("`datatypelist'", "panel")>0 {
	    local paneldesc "Basic controls also include year fixed effects when the outcome is observed in a panel."
	}	
	    
    file open fh using "`outfolder'/table_main_`outgroup'_w`weightnum'_`subj'.tex", write replace
	
	local Cs "c c "
	local colnums "& (1) & (2) "
	local specs "1 2"
	
	
	file write fh "\begin{tabular}{l `Cs'}" _n ///
	"\hline \hline" _n ///
	" `colnums' \\ " _n ///
	"\cline{2-3}" _n 
	
	local varnum = 1	
	foreach var in `varlist' {
		use "`datafolder'/CDS_TAS_PSID_analysis_`datatype'.dta", clear
		set_up_data, w(`weightnum') s(`subj')
		
		local format "%4.3f"
		local label: variable label later_`var'
		
		* run regressions
		local col 1							
		
		* main indepvar
		local confindepvar `var'
					

			foreach s in `specs' {
				
				* controls restrictions
				local controls ${spec`s'_w`weightnum'}
		 
				* run regression
				reg later_`var' `confindepvar' `controls' `extracontrols' [pweight = weight_confsamp`weightnum'] if confidence_sample==1, cluster($cluster)
				
				local N`col' = `e(N)'
				local ranreg = 1 
				
				* sample mean 
				sum later_`var' [aweight = `weightvar'] if e(sample)==1
				local mean`col' = string(`r(mean)', "`format'")
				 
			
				local df`col' = `e(df_r)'

				* save coefficients
				local v cnf
				if strpos("`e(cmdline)'", "`confindepvar'") > 0 & _b[`confindepvar']!=0  & `ranreg'==1 {
					local b`col'_`v' = string(_b[`confindepvar'], "`format'")
					local se`col'_`v' = string(_se[`confindepvar'], "`format'")
					get_pval, b(`b`col'_`v'') se(`se`col'_`v'') df(`df`col'')
					 
					local star`col'_`v' "`r(star)'"
					local bstar`col'_`v' "`b`col'_`v''`star`col'_`v''"
					local se`col'_`v'_nop = `se`col'_`v''
					local se`col'_`v' = "(" + "`se`col'_`v''" + ")"
				}
				else {
					local bstar`col'_`v' "--"
					local se`col'_`v' = ""
				}
				
			
				local col = `col'+1  
			}

		
		* write rows to table
					
		local v cnf

		file write fh " \hspace{10pt} `Subject' `label' &  `bstar1_`v'' & `bstar2_`v''   \\" _n
		file write fh " \hspace{10pt}   & `se1_`v'' & `se2_`v''  \\" _n 
		file write fh "\hspace{20pt} N & `N1' &    \\ " _n
		file write fh "\hspace{20pt} Sample mean & `mean1' &    \\ \\" _n
		
		local varnum = `varnum'+1
	}
	
	
	file write fh "Basic controls: 				   & \checkmark  & \checkmark   	\\" _n
	file write fh "Added background controls:      & 		    & \checkmark    \\" _n



	file write fh "\hline \hline " _n 
	file write fh "\end{tabular}" _n
	file write fh "\begin{tabular}{ p{6in} }" _n 
	file write fh "\footnotesize" _n 
	
	#delimit ;
	
	if  "`subj'"=="math" & `weightnum'==0 {;
		local notes "Notes:  This table regresses `description' outcomes on various definitions of childhood `subject' confidence with various 
		controls. Adolescent confidence is measured five years after the childhood measurement. In each row, the dependent variable is the adolescent measurement of the independent variable described. The measures of over- and under-confidence are our main binary measures. Our secondary measure of degrees of confidence takes on values from -6 to 6 and persistence of that variable is shown in the third row. The fourth row standardizes the degrees of confidence measure to have mean 0 and standard deviation 1, to facilitate ease of interpretation. 
		All controls that are time-variant are observed in the same year as the confidence measures. 
		Basic controls  include child gender, race, decile fixed effects for math and reading test percentile 
		scores, digit span test scores, a general confidence index, family taxable income and its square, parent education, 
		quarter-of-birth fixed effects, year-of-birth fixed effects, age at which confidence was measured fixed effects, 
		and state fixed effects. `extracontrolsnote'   Added background controls are parents' rating of child health, 
		indicators for receiving government transfers, household structure, parenting practices, parent occupation,
		and parent mental health and confidence measures. All controls are recoded to zero if missing and we include a missing indicator. 
	`weightdesc' Standard errors are clustered by family, and included in parentheses below each 
	estimate. *, **, and *** indicate significance at the 0.1, 0.05, and 0.01 percent level, respectively. ";
	};
	
	else {;
		local notes "Notes: This table regresses `description' outcomes on childhood `subject' confidence with various controls.  All controls are the same as described in Table 1. `weightdesc' Standard errors are clustered by family, and included in parentheses below each 
	estimate. *, **, and *** indicate significance at the 0.1, 0.05, and 0.01 percent level, respectively.";
	};
    file write fh "`notes'" _n ;
	#delimit cr
    file write fh "\end{tabular}" _n 
    file close fh 
end





********************************************************************************
// Run regressions and write tables


#delimit ;

local weightnum 0;
local subj math;
local w "_w`weightnum'";
	
  * Table 1: confidence persistence;	
	write_ols_reg_table, 
	varlist(`subj'_overcnf `subj'_undrcnf  z_`subj'_confidence`w'   )
	datatype( 1year)
	outgroup(adolescent_confidence)
	description("adolescent confidence")
	datafolder("`CLEAN'")  
	outfolder("`OUT'")
	weightnum("`weightnum'")
	subj("`subj'")
	extracontrols($extra_testscores )
	extracontrolsnote("We also include fixed effects for adolescent test score deciles in math and reading.") ;



